clear all
set more off
cd "/homes/nber/yasenov"

capture log close
log using "sum_stats", replace

use "/disk/homedirs/nber/yasenov/matched_30_40.dta"
keep if border1930 == 1

*************************
*** SAMPLE SELECTION ****
*************************
sum age_A
keep if age_A >= 18 & age_A <= 55  
keep if bpl_A<15000 | bpl_A==90011 | bpl_A==90021 | bpl_A==90022
drop if mbpl_A == 20000 | fbpl_A == 20000

sum
desc

****************************
*** OCCUPATION GROUPINGS ***
****************************
local yrs A B
foreach j of local yrs {
	replace occ1950_`j'=0 if occ1950_`j'>=0 & occ1950_`j'<=99 
	replace occ1950_`j'=100 if occ1950_`j'>=100 & occ1950_`j'<=123 
	replace occ1950_`j'=200 if occ1950_`j'>=200 & occ1950_`j'<=290
	replace occ1950_`j'=300 if occ1950_`j'>=300 & occ1950_`j'<=390
	replace occ1950_`j'=400 if occ1950_`j'>=400 & occ1950_`j'<=490
	replace occ1950_`j'=500 if occ1950_`j'>=500 & occ1950_`j'<=595
	replace occ1950_`j'=600 if occ1950_`j'>=600 & occ1950_`j'<=690
	replace occ1950_`j'=700 if occ1950_`j'>=700 & occ1950_`j'<=790
	replace occ1950_`j'=810 if occ1950_`j'>=810 & occ1950_`j'<=840
	replace occ1950_`j'=910 if occ1950_`j'>=910 & occ1950_`j'<=970
	replace occ1950_`j'=999 if occ1950_`j'>970
	tab occ1950_`j', m
}

label define vasocc 0 "Professional, Technical" ///
	100 "Farmers" ///
	200 "Managers, Officials, and Proprietors" ///
	300 "Clerical and Kindred" ///
	400 "Sales workers" ///
	500 "Craftsmen" ///
	600 "Operatives" ///
	700 "Service Workers" ///
	810 "Farm Laborers" ///
	910 "Laborers" ///
	999 "Not yet classified"
label values occ1950_A occ1950_B vasocc	

gen byte low_skilled = inlist(occ1950_A, 700, 810, 910)
gen byte low_skilled1940 = inlist(occ1950_B, 700, 810, 910)
gen byte high_skilled  = inlist(occ1950_A, 0, 100, 200, 300, 400, 500, 600)
gen byte high_skilled1940  = inlist(occ1950_B, 100, 0, 200, 300, 400, 500, 600)

tab low_skilled high_skilled, m
tab low_skilled1940 high_skilled1940, m
tab low_skilled*, m
tab high_skilled*, m

************************
*** 1910 BOUNDARIES ****
************************

gen county = (statefip_A * 1000) + (county_A / 10)
replace county=41061 if county==41060.5

gen county40 = (statefip_B * 1000) + (county_B / 10)
replace county40=41061 if county40==41060.5

foreach j of varlist county county40 {

	* Arizona
	replace `j' = 4009 if `j' == 4011

	* Louisiana
	replace `j' = 22097 if `j' == 22039
	replace `j' = 22019 if `j' == 22011 | `j' == 22003 | `j' == 22053

	* New Mexico
	replace `j' = 35017 if `j' == 35023
	replace `j' = 35019 if `j' == 35011
	replace `j' = 35005 if `j' == 35025

	* Nevada
	replace `j' = 32013 if `j' == 32027
	replace `j' = 32009 if `j' == 32021

	* Oklahoma
	replace `j' = 40031 if `j' == 40033

	* Oregon
	replace `j' = 41013 if `j' == 41031 | `j' == 41017

	* Texas
	replace `j' = 48141 if `j' == 48229 | `j' == 48109
	replace `j' = 48137 if `j' == 48385
	replace `j' = 48355 if `j' == 48273 | `j' == 42849
	replace `j' = 48061 if `j' == 48489
	replace `j' = 48427 if `j' == 48407

	* Utah
	replace `j' = 49047 if `j' == 49009
	replace `j' = 49051 if `j' == 49013
}

**************
*** MERGE ****
**************

merge m:1 county using "aggregate_nber.dta", keep(1 3) 
tab county if _merge == 1, sort
tab county if _merge == 2, sort
drop _merge

drop sea statefip pop30
compress

rename (occ1950_A occ1950_B) (occ19501930 occ19501940)
merge m:1 occ19501930 using "occ_wage_1930.dta"
tab occ19501930 if _merge == 1, sort
tab occ19501930 if _merge == 2, sort
drop _merge

merge m:1 occ19501940 using "occ_wage_1940.dta"
tab occ19501940 if _merge == 1, sort
tab occ19501940 if _merge == 2, sort
drop _merge
rename (occ19501930 occ19501940) (occ1950_A occ1950_B)

replace wage30 = . if occ1950_A > 970
replace wage40 = . if occ1950_B > 970

***************************
*** SUMMARY STATISTICS ****
***************************

gen black = race_A == 200
gen married = marst_A == 1 | marst_A == 2
gen male = 1
gen literate = lit_A == 4
gen inschool = school_A == 2
gen inlf = labforce_A == 2

*************
*************

* DEMOGRAPHICS ENTIRE SAMPLE
sum age_A male black married literate inschool inlf 
tabstat age_A male black  married literate inschool inlf, f(%4.3f) c(s) s(mean n)

log close
